In [2]:
import numpy as np 
import pandas as pd 
import plotly.express as px
import plotly.io as pio
import plotly.graph_objects as go 
pio.templates.default = 'plotly_white'
In [3]:
df = pd.read_csv('supply_chain_data.csv')
In [4]:
df.sample(10)
Out[4]:
Product type SKU Price Availability Number of products sold Revenue generated Customer demographics Stock levels Lead times Order quantities ... Location Lead time Production volumes Manufacturing lead time Manufacturing costs Inspection results Defect rates Transportation modes Routes Costs
88 cosmetics SKU88 75.270407 58 737 9444.742033 Male 60 18 85 ... Mumbai 21 953 11 68.184919 Pending 0.722204 Sea Route A 103.916248
91 cosmetics SKU91 62.111965 90 916 1935.206794 Male 98 22 85 ... Delhi 5 207 28 39.772883 Pending 0.626002 Rail Route B 996.778315
11 skincare SKU11 90.635460 95 960 6099.944116 Female 46 23 60 ... Kolkata 28 362 11 27.592363 Pending 0.021170 Air Route A 126.723033
50 cosmetics SKU50 14.203484 91 633 5910.885390 Female 31 23 82 ... Delhi 20 306 21 45.178758 Fail 4.754801 Rail Route B 496.248650
58 skincare SKU58 59.841561 81 896 2021.149810 Non-binary 10 5 44 ... Delhi 18 396 7 65.047415 Fail 1.730375 Road Route B 110.364335
47 skincare SKU47 95.712136 93 910 7089.474250 Male 4 15 51 ... Kolkata 10 964 20 19.712993 Pending 0.380574 Rail Route A 581.602355
99 haircare SKU99 68.517833 17 627 9185.185829 Unknown 55 8 59 ... Chennai 29 921 2 38.072899 Fail 0.346027 Rail Route B 210.743009
49 cosmetics SKU49 78.897913 19 99 8001.613207 Unknown 97 24 9 ... Delhi 28 177 28 14.147815 Pass 2.825814 Rail Route A 336.890169
78 haircare SKU78 6.306883 50 946 1292.458418 Unknown 5 4 51 ... Mumbai 25 858 21 71.126515 Pending 4.096881 Sea Route C 323.592203
32 skincare SKU32 79.209936 43 781 9571.550487 Unknown 89 13 64 ... Kolkata 30 737 7 29.692467 Pass 1.946036 Road Route A 761.173910

10 rows × 24 columns

In [5]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 24 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Product type             100 non-null    object 
 1   SKU                      100 non-null    object 
 2   Price                    100 non-null    float64
 3   Availability             100 non-null    int64  
 4   Number of products sold  100 non-null    int64  
 5   Revenue generated        100 non-null    float64
 6   Customer demographics    100 non-null    object 
 7   Stock levels             100 non-null    int64  
 8   Lead times               100 non-null    int64  
 9   Order quantities         100 non-null    int64  
 10  Shipping times           100 non-null    int64  
 11  Shipping carriers        100 non-null    object 
 12  Shipping costs           100 non-null    float64
 13  Supplier name            100 non-null    object 
 14  Location                 100 non-null    object 
 15  Lead time                100 non-null    int64  
 16  Production volumes       100 non-null    int64  
 17  Manufacturing lead time  100 non-null    int64  
 18  Manufacturing costs      100 non-null    float64
 19  Inspection results       100 non-null    object 
 20  Defect rates             100 non-null    float64
 21  Transportation modes     100 non-null    object 
 22  Routes                   100 non-null    object 
 23  Costs                    100 non-null    float64
dtypes: float64(6), int64(9), object(9)
memory usage: 18.9+ KB
In [6]:
print(df['Location'].unique())
print(df['Product type'].unique())
print(df['Transportation modes'].unique())
['Mumbai' 'Kolkata' 'Delhi' 'Bangalore' 'Chennai']
['haircare' 'skincare' 'cosmetics']
['Road' 'Air' 'Rail' 'Sea']
In [7]:
df.describe()
Out[7]:
Price Availability Number of products sold Revenue generated Stock levels Lead times Order quantities Shipping times Shipping costs Lead time Production volumes Manufacturing lead time Manufacturing costs Defect rates Costs
count 100.000000 100.000000 100.000000 100.000000 100.000000 100.000000 100.000000 100.000000 100.000000 100.000000 100.000000 100.00000 100.000000 100.000000 100.000000
mean 49.462461 48.400000 460.990000 5776.048187 47.770000 15.960000 49.220000 5.750000 5.548149 17.080000 567.840000 14.77000 47.266693 2.277158 529.245782
std 31.168193 30.743317 303.780074 2732.841744 31.369372 8.785801 26.784429 2.724283 2.651376 8.846251 263.046861 8.91243 28.982841 1.461366 258.301696
min 1.699976 1.000000 8.000000 1061.618523 0.000000 1.000000 1.000000 1.000000 1.013487 1.000000 104.000000 1.00000 1.085069 0.018608 103.916248
25% 19.597823 22.750000 184.250000 2812.847151 16.750000 8.000000 26.000000 3.750000 3.540248 10.000000 352.000000 7.00000 22.983299 1.009650 318.778455
50% 51.239831 43.500000 392.500000 6006.352023 47.500000 17.000000 52.000000 6.000000 5.320534 18.000000 568.500000 14.00000 45.905622 2.141863 520.430444
75% 77.198228 75.000000 704.250000 8253.976921 73.000000 24.000000 71.250000 8.000000 7.601695 25.000000 797.000000 23.00000 68.621026 3.563995 763.078231
max 99.171329 100.000000 996.000000 9866.465458 100.000000 30.000000 96.000000 10.000000 9.929816 30.000000 985.000000 30.00000 99.466109 4.939255 997.413450
In [8]:
plt = px.scatter(df, x='Price', y='Revenue generated',
                 color='Product type',
                 hover_data=['Number of products sold'],
                 trendline="ols",
                 title='Revenue Generation by Product Type')
plt.show()
In [9]:
sales = df.groupby('Product type')['Number of products sold'].sum().reset_index()
pieChart = px.pie(sales, values='Number of products sold', names='Product type',
                  title='Sales by Product type',
                  hover_data=['Number of products sold'],
                  hole=0.5,
                  color_discrete_sequence=px.colors.qualitative.Pastel)
pieChart.update_traces(textposition='inside', textinfo='percent+label')
# pieChart.write_html('first_figure.html', auto_open=True)
pieChart.show()
In [10]:
revenue = df.groupby('Shipping carriers')['Revenue generated'].sum().reset_index()
bar = go.Figure()
bar.add_trace(go.Bar(x=revenue['Shipping carriers'], 
                     y=revenue['Revenue generated']))
bar.update_layout(title='Total Revenue by Shipping Carrier', 
                  xaxis_title='Shipping Carrier', 
                  yaxis_title='Revenue Generated')
bar.show()
In [11]:
avgTime = df.groupby('Product type')['Lead time'].mean().reset_index()
avgManCost = df.groupby('Product type')['Manufacturing costs'].mean().reset_index()
result = pd.merge(avgTime, avgManCost, on='Product type')
result.rename(columns={'Lead time': 'Average Lead Time', 'Menufacturing costs':'Avarage Menufacturing Costs'}, inplace=True)
print(result)
  Product type  Average Lead Time  Manufacturing costs
0    cosmetics          13.538462            43.052740
1     haircare          18.705882            48.457993
2     skincare          18.000000            48.993157
In [12]:
revenue = px.line(df, x='SKU', y= 'Revenue generated', title='Revenue Generated by SKU')
revenue.show()
In [13]:
stock = px.line(df, x='SKU', y= 'Stock levels', title='Stock Levels by SKU')
stock.show()
In [14]:
order = px.bar(df, x='SKU', y= 'Order quantities', title='Order quantities by SKU')
order.show()
In [15]:
shippingCost = px.bar(df, x='Shipping carriers', y= 'Shipping costs',
                      title='Shipping Costs by Carrier')
shippingCost.show()
In [16]:
transportation = px.pie(df, values='Costs',
                        names='Transportation modes',
                        title='Cost Distribution by Transportation',
                        hole=0.5,
                        color_discrete_sequence=px.colors.qualitative.Pastel)
transportation.update_traces(textposition='inside', textinfo='percent+label')
transportation.show()
In [17]:
defectRate = df.groupby('Product type')['Defect rates'].mean().reset_index()
plot = px.bar(defectRate, x='Product type', y='Defect rates',
               title='Avarage Defect Rates by Product Type')
plot.show()
In [18]:
defectRate = df.groupby('SKU')['Defect rates'].mean().reset_index()
plot = px.line(defectRate, x='SKU', y='Defect rates',
               title='Avarage Defect Rates by SKU')
plot.show()
In [19]:
pivotTable = pd.pivot_table(df, values='Defect rates',
                            index=['Transportation modes'],
                            aggfunc='mean')
chart = px.pie(values=pivotTable['Defect rates'], names=pivotTable.index,
               hole=0.5, title='Defect Rates per Transportation modes',
               color_discrete_sequence=px.colors.qualitative.Pastel)
chart.update_traces(textposition='inside', textinfo='percent+label')
# update_traces(textposition='inside', textinfo='percent+label')
chart.show()
In [20]:
defect1 = df.groupby(['Transportation modes','Product type'])['Defect rates'].mean().reset_index()
defect2 = df.groupby(['Transportation modes','Product type'])['Order quantities'].sum().reset_index()
defect=pd.merge(defect2, defect1, on=['Transportation modes', 'Product type'])
print(defect)
   Transportation modes Product type  Order quantities  Defect rates
0                   Air    cosmetics               230      0.399381
1                   Air     haircare               403      2.924709
2                   Air     skincare               708      1.694418
3                  Rail    cosmetics               463      2.158841
4                  Rail     haircare               371      1.959318
5                  Rail     skincare               508      2.878227
6                  Road    cosmetics               268      2.703675
7                  Road     haircare               543      2.238732
8                  Road     skincare               575      2.992763
9                   Sea    cosmetics               382      2.042156
10                  Sea     haircare               163      3.642869
11                  Sea     skincare               308      1.790767
In [21]:
plot = px.bar(defect, x= 'Transportation modes',
                  y='Order quantities',
                  color='Product type',
                  barmode='group',
                  text_auto='.2s',
                  title='Order quantities by catagories')
plot.show()
In [22]:
plot = px.bar(defect, x= 'Transportation modes',
                  y='Defect rates',
                  color='Product type',
                  barmode='group',
                  text_auto='.2s',
                  title='Order quantities by catagories')
plot.show()